Contexto
Esta serie consta del valor FOB en dólares de las importaciones que llegan a los puertos de Colombia vía marítima y su destino final es la ciudad de Bogotá, en el periodo transcurrido entre enero del 2012 hasta diciembre de 2021, la serie es de tipo mensual.
Definición: El valor FOB en dólares de la mercancía, es valor de la mercancía en el momento que se carga a bordo del medio de transporte, en este caso el marítimo.
La serie consta de 120 observaciones, lo que corresponde a los 10 transcurridos desde el 2012 hasta el 2021
# 1.000'000.000
vafodo <- ts(importaciones[,3], start = c(2012, 01), frequency =12)/1000000000
plot(vafodo, ylab = "Miles de millones de dólares", main = "Valor FOB", lw =2)
Visualmente vemos que la serie presenta una tendencia, una varianza no constante y
1. Parte descriptiva
1.1 Estabilización de la varianza
Transformación de Box-Cox
serie <- vafodo
a <- MASS::boxcox(lm(serie ~ 1), seq(-1, 1, length = 50))
BC.m <- a$x[which.max(a$y)]
BC.f <- forecast::BoxCox.lambda(serie, method = "loglik",
lower = -1,
upper = 1)
# Transformación logarítmica
lserie <- log(vafodo)
a <- MASS::boxcox(lm(lserie ~ 1), seq(-2, 2, length = 50))
BC.ml <- a$x[which.max(a$y)]
BC.fl <- forecast::BoxCox.lambda(lserie, method = "loglik",
lower = -2,
upper = 2)
c(BC.f, BC.fl, BC.m, BC.ml)
## [1] -0.2500000 -0.2000000 -0.1919192 -0.1414141
Los valores de \(\lambda\) obtenidos por el método de Box-Cox tanto en el paquete MASS, como en el paquete forecast, son diferentes de 1 e inferiorese a 0, además el intervalo de confianza para \(\lambda\) no captura el 1, por lo que se usará la transformación logarítmica. Una vez aplicada, notamos que el intervalo de confianza caputra al 1, por otra parte los valores de \(\lambda\) siguen siendo inferiores a 0.
1.2 Estimación de la tendencia
fit_lserie <- lm(lserie ~time(lserie), na.action = NULL)
summary(fit_lserie)
##
## Call:
## lm(formula = lserie ~ time(lserie), na.action = NULL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6748 -0.1490 0.0355 0.2354 0.5725
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.251e+02 1.920e+01 -16.93 <2e-16 ***
## time(lserie) 1.630e-01 9.521e-03 17.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3011 on 118 degrees of freedom
## Multiple R-squared: 0.713, Adjusted R-squared: 0.7106
## F-statistic: 293.2 on 1 and 118 DF, p-value: < 2.2e-16
# Grafico
plot(lserie, lw = 2, main = "Valor FOB en escala log", ylab = "Log de miles de millones de dólares")
abline(fit_lserie, col = "blue", lw = 2)
# Eliminando la tendencia
lserie.sin.tend <- lserie - predict(fit_lserie)
plot(lserie.sin.tend, main = "Valor FOB en escala log sin tendencia", lw =2)
acf(lserie, lag.max = length(lserie))
acf(lserie.sin.tend, lag.max = length(lserie.sin.tend))
1.3 Promedio móvil
descomposicion_lserie <- decompose(lserie)
plot(descomposicion_lserie)
1.4 Tendencia desde el STL
indice_lserie <- as.Date(as.yearmon(tk_index(lserie)))
indice_lserie1 <- yearmonth(as.yearmon(tk_index(lserie)))
# Forma alternativa de extraer el indice
df_lserie <- data.frame(Fecha = indice_lserie,
lserie = as.matrix(lserie))
tibble_lserie <- tibble(df_lserie)
tsibble_lserie <- as_tsibble(df_lserie)
# Primera aproximación al ajuste STL
# escala log
tsibble_lserie %>%
timetk::plot_time_series(Fecha, lserie,
.interactive = TRUE,
.plotly_slider = TRUE)
# Ajuste STL
# escala log
tibble_lserie %>%
mutate(lserie_ajust = smooth_vec(lserie,
span = 0.3,
degree =2)
)
## # A tibble: 120 × 3
## Fecha lserie lserie_ajust
## <date> <dbl> <dbl>
## 1 2012-01-01 2.76 2.90
## 2 2012-02-01 2.88 2.92
## 3 2012-03-01 2.98 2.94
## 4 2012-04-01 2.77 2.95
## 5 2012-05-01 3.16 2.97
## 6 2012-06-01 3.16 2.99
## 7 2012-07-01 3.13 3.00
## 8 2012-08-01 3.18 3.02
## 9 2012-09-01 3.06 3.04
## 10 2012-10-01 2.99 3.05
## # ℹ 110 more rows
# Ajuste STL moviendo los parámetros
# escala log
tsibble_lserie %>% mutate(
lserie_ajus = smooth_vec(lserie, span = 0.3, degree = 2)) %>%
ggplot(aes(Fecha, lserie)) +
geom_line(size =1.05)+
geom_line(aes(y = lserie_ajus), color = "blue", size =1.05) +
theme_bw()
1.4.1 STL Tendencia y estacionalidad
tsibble_lserie <- as_tsibble(lserie)
tsibble_lserie %>%
model(
STL(value ~ trend() +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot() +
theme_minimal()
1.5 Diferencia Ordinaria
# escala log
tsibble_lserie|>mutate(
diff_lserie = tsibble::difference(value, lag = 1,
differences = 1))|>
autoplot(.vars = diff_lserie, size = 1.05) +
labs(subtitle = "Cambios en escala log del valor FOB") +
theme_bw()
tsibble_lserie <- tsibble_lserie|>mutate(
diff_lserie = tsibble::difference(value, lag = 1,
difference = 1))
# Diferenciando basado en el objeto tibble
tibble_lserie %>%
mutate(diff_lserie = lserie - lag(lserie)) %>%
plot_time_series(Fecha, diff_lserie)
tibble_lserie <- tibble_lserie %>%
mutate(diff_lserie = lserie - lag(lserie))
dlserie <- diff(lserie)
1.6 Relaciones no lineales dispersión
par(mar = c(3,2,3,2))
astsa::lag1.plot(dlserie, 12, corr = T)
1.7 ACF
acf(dlserie, 48, main = "Serie diferenciada y con logaritmo del valor FOB")
pacf(dlserie, 48)
1.8 Índice AMI
par(mar = c(3,2,3,2))
astsa::lag1.plot(lserie, 12, corr = F)
nonlinearTseries::mutualInformation(lserie, lag.max = 100,
n.partitions = 50,
units = "Bits",
do.plot = TRUE)
## $time.lag
## [1] 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## [19] 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
## [37] 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
## [55] 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
## [73] 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
## [91] 90 91 92 93 94 95 96 97 98 99 100
##
## $mutual.information
## [1] 4.862322 3.052912 2.910639 2.853770 2.920942 2.891607 2.907651 2.827147
## [9] 2.925675 2.864164 2.945057 2.907872 2.930455 2.937367 2.914099 2.921245
## [17] 2.897458 2.858685 2.728650 2.655587 2.641206 2.651833 2.831493 2.653982
## [25] 2.687045 2.755120 2.669964 2.698785 2.654698 2.684328 2.620457 2.686898
## [33] 2.690152 2.653327 2.639095 2.577666 2.568432 2.634660 2.650707 2.454465
## [41] 2.437976 2.415092 2.475064 2.416767 2.337317 2.276132 2.396371 2.311431
## [49] 2.352713 2.306940 2.231579 2.096361 2.185558 2.277735 2.168338 2.190601
## [57] 2.096567 2.011887 1.924846 1.933693 1.876562 1.929006 1.939626 1.783764
## [65] 1.850553 2.089983 1.879139 1.926220 1.970048 1.892796 1.973020 1.991408
## [73] 2.021388 1.989870 1.978963 2.251709 2.071896 2.081328 2.122268 2.166044
## [81] 2.207717 2.342629 2.320075 2.513545 2.468303 2.236186 2.380430 2.596091
## [89] 2.451598 2.517136 2.321928 2.476648 2.340975 2.253077 2.179329 2.402292
## [97] 2.312907 2.239678 2.344462 2.271873 2.395462
##
## $units
## [1] "Bits"
##
## $n.partitions
## [1] 50
##
## attr(,"class")
## [1] "mutualInf"
# sobre la serie diferenciada
nonlinearTseries::mutualInformation(dlserie, lag.max = 100,
n.partitions = 50,
units = "Bits",
do.plot = TRUE)
## $time.lag
## [1] 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
## [19] 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
## [37] 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
## [55] 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
## [73] 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
## [91] 90 91 92 93 94 95 96 97 98 99 100
##
## $mutual.information
## [1] 3.908374 1.421193 1.455035 1.402739 1.493103 1.555906 1.622355 1.429019
## [9] 1.455684 1.472721 1.553306 1.524770 1.539951 1.462847 1.493069 1.532704
## [17] 1.674945 1.637317 1.606369 1.709394 1.643386 1.664606 1.578555 1.629250
## [25] 1.658723 1.653978 1.706768 1.625568 1.645700 1.740113 1.544253 1.622424
## [33] 1.684184 1.678909 1.651387 1.633178 1.733924 1.629698 1.774529 1.713856
## [41] 1.833378 1.693334 1.762807 1.807361 1.655236 1.800483 1.853089 1.751038
## [49] 1.714328 1.654343 1.759697 1.794311 1.768562 1.839690 1.937591 1.927800
## [57] 1.721239 1.820051 1.912216 1.967309 1.800184 1.839622 1.729774 1.779251
## [65] 1.825002 1.785359 1.792801 1.772154 1.735934 2.024498 1.849860 1.820850
## [73] 1.911494 1.844747 1.830496 1.857667 1.804805 2.060829 1.956765 1.879505
## [81] 2.010459 2.004286 2.163420 1.962755 1.773337 1.823701 1.839891 2.023176
## [89] 2.049709 2.022580 2.166978 2.020244 2.327151 2.314320 2.222683 2.292481
## [97] 2.305321 2.231270 2.320423 2.370951 2.299530
##
## $units
## [1] "Bits"
##
## $n.partitions
## [1] 50
##
## attr(,"class")
## [1] "mutualInf"
1.9 Exploración de la Estacionalidad
monthplot(dlserie)
tsibble_lserie %>%
na.omit()|>gg_subseries(diff_lserie,period=12) +
theme_minimal()
tibble_lserie %>%na.omit()|>
mutate(
Mes = str_c("", as.character(lubridate::month(Fecha,label=TRUE)))
) %>%
plot_time_series(
.date_var = Fecha,
.value = diff_lserie,
.facet_vars = Mes,
.facet_ncol = 4,
.color_var = Mes,
.facet_scales = "fixed",
.interactive = FALSE,
.legend_show = FALSE,
.smooth = FALSE
)
ggseasonplot(dlserie)
1.9.1 Gráfico de cajas
tibble_lserie %>%
na.omit() %>%
plot_seasonal_diagnostics(.date_var = Fecha,.value = diff_lserie,
.feature_set = c("month.lbl"),.geom="boxplot")
ggplot(tibble_lserie %>%
na.omit()|>
mutate(Mes = str_c("Mes ", as.character(lubridate::month(Fecha)))),
aes(x = diff_lserie)) +
geom_density(aes(fill = Mes)) +
ggtitle("Estimación de la densidad vía Kernel por mes") +
facet_grid(rows = vars(as.factor(Mes)))
1.9.2 Periodograma
spectrum(as.numeric(dlserie))
Periodgramadlserie <- spectrum(as.numeric(dlserie),log='no')
ubicacionlserie=which.max(Periodgramadlserie$spec)
sprintf("El valor de la frecuencia donde se máximiza el periodograma para la serie es: %s",Periodgramadlserie$freq[ubicacionlserie])
## [1] "El valor de la frecuencia donde se máximiza el periodograma para la serie es: 0.391666666666667"
sprintf("El periodo correspondiente es aproximadamente: %s",1/Periodgramadlserie$freq[ubicacionlserie])
## [1] "El periodo correspondiente es aproximadamente: 2.5531914893617"
1.9.3 Ajuste de la estacionalidad con componentes de Fourier y Dummy
tsibble_serie <- as_tsibble(serie)
# Variables Dummy y Armónicos
Armonicos=TSA::harmonic(serie, m = 1)
# Armónicos
forecast::fourier(serie,K=1)
FALSE S1-12 C1-12
FALSE [1,] 0.5000000 0.8660254
FALSE [2,] 0.8660254 0.5000000
FALSE [3,] 1.0000000 0.0000000
FALSE [4,] 0.8660254 -0.5000000
FALSE [5,] 0.5000000 -0.8660254
FALSE [6,] 0.0000000 -1.0000000
FALSE [7,] -0.5000000 -0.8660254
FALSE [8,] -0.8660254 -0.5000000
FALSE [9,] -1.0000000 0.0000000
FALSE [10,] -0.8660254 0.5000000
FALSE [11,] -0.5000000 0.8660254
FALSE [12,] 0.0000000 1.0000000
FALSE [13,] 0.5000000 0.8660254
FALSE [14,] 0.8660254 0.5000000
FALSE [15,] 1.0000000 0.0000000
FALSE [16,] 0.8660254 -0.5000000
FALSE [17,] 0.5000000 -0.8660254
FALSE [18,] 0.0000000 -1.0000000
FALSE [19,] -0.5000000 -0.8660254
FALSE [20,] -0.8660254 -0.5000000
FALSE [21,] -1.0000000 0.0000000
FALSE [22,] -0.8660254 0.5000000
FALSE [23,] -0.5000000 0.8660254
FALSE [24,] 0.0000000 1.0000000
FALSE [25,] 0.5000000 0.8660254
FALSE [26,] 0.8660254 0.5000000
FALSE [27,] 1.0000000 0.0000000
FALSE [28,] 0.8660254 -0.5000000
FALSE [29,] 0.5000000 -0.8660254
FALSE [30,] 0.0000000 -1.0000000
FALSE [31,] -0.5000000 -0.8660254
FALSE [32,] -0.8660254 -0.5000000
FALSE [33,] -1.0000000 0.0000000
FALSE [34,] -0.8660254 0.5000000
FALSE [35,] -0.5000000 0.8660254
FALSE [36,] 0.0000000 1.0000000
FALSE [37,] 0.5000000 0.8660254
FALSE [38,] 0.8660254 0.5000000
FALSE [39,] 1.0000000 0.0000000
FALSE [40,] 0.8660254 -0.5000000
FALSE [41,] 0.5000000 -0.8660254
FALSE [42,] 0.0000000 -1.0000000
FALSE [43,] -0.5000000 -0.8660254
FALSE [44,] -0.8660254 -0.5000000
FALSE [45,] -1.0000000 0.0000000
FALSE [46,] -0.8660254 0.5000000
FALSE [47,] -0.5000000 0.8660254
FALSE [48,] 0.0000000 1.0000000
FALSE [49,] 0.5000000 0.8660254
FALSE [50,] 0.8660254 0.5000000
FALSE [51,] 1.0000000 0.0000000
FALSE [52,] 0.8660254 -0.5000000
FALSE [53,] 0.5000000 -0.8660254
FALSE [54,] 0.0000000 -1.0000000
FALSE [55,] -0.5000000 -0.8660254
FALSE [56,] -0.8660254 -0.5000000
FALSE [57,] -1.0000000 0.0000000
FALSE [58,] -0.8660254 0.5000000
FALSE [59,] -0.5000000 0.8660254
FALSE [60,] 0.0000000 1.0000000
FALSE [61,] 0.5000000 0.8660254
FALSE [62,] 0.8660254 0.5000000
FALSE [63,] 1.0000000 0.0000000
FALSE [64,] 0.8660254 -0.5000000
FALSE [65,] 0.5000000 -0.8660254
FALSE [66,] 0.0000000 -1.0000000
FALSE [67,] -0.5000000 -0.8660254
FALSE [68,] -0.8660254 -0.5000000
FALSE [69,] -1.0000000 0.0000000
FALSE [70,] -0.8660254 0.5000000
FALSE [71,] -0.5000000 0.8660254
FALSE [72,] 0.0000000 1.0000000
FALSE [73,] 0.5000000 0.8660254
FALSE [74,] 0.8660254 0.5000000
FALSE [75,] 1.0000000 0.0000000
FALSE [76,] 0.8660254 -0.5000000
FALSE [77,] 0.5000000 -0.8660254
FALSE [78,] 0.0000000 -1.0000000
FALSE [79,] -0.5000000 -0.8660254
FALSE [80,] -0.8660254 -0.5000000
FALSE [81,] -1.0000000 0.0000000
FALSE [82,] -0.8660254 0.5000000
FALSE [83,] -0.5000000 0.8660254
FALSE [84,] 0.0000000 1.0000000
FALSE [85,] 0.5000000 0.8660254
FALSE [86,] 0.8660254 0.5000000
FALSE [87,] 1.0000000 0.0000000
FALSE [88,] 0.8660254 -0.5000000
FALSE [89,] 0.5000000 -0.8660254
FALSE [90,] 0.0000000 -1.0000000
FALSE [91,] -0.5000000 -0.8660254
FALSE [92,] -0.8660254 -0.5000000
FALSE [93,] -1.0000000 0.0000000
FALSE [94,] -0.8660254 0.5000000
FALSE [95,] -0.5000000 0.8660254
FALSE [96,] 0.0000000 1.0000000
FALSE [97,] 0.5000000 0.8660254
FALSE [98,] 0.8660254 0.5000000
FALSE [99,] 1.0000000 0.0000000
FALSE [100,] 0.8660254 -0.5000000
FALSE [101,] 0.5000000 -0.8660254
FALSE [102,] 0.0000000 -1.0000000
FALSE [103,] -0.5000000 -0.8660254
FALSE [104,] -0.8660254 -0.5000000
FALSE [105,] -1.0000000 0.0000000
FALSE [106,] -0.8660254 0.5000000
FALSE [107,] -0.5000000 0.8660254
FALSE [108,] 0.0000000 1.0000000
FALSE [109,] 0.5000000 0.8660254
FALSE [110,] 0.8660254 0.5000000
FALSE [111,] 1.0000000 0.0000000
FALSE [112,] 0.8660254 -0.5000000
FALSE [113,] 0.5000000 -0.8660254
FALSE [114,] 0.0000000 -1.0000000
FALSE [115,] -0.5000000 -0.8660254
FALSE [116,] -0.8660254 -0.5000000
FALSE [117,] -1.0000000 0.0000000
FALSE [118,] -0.8660254 0.5000000
FALSE [119,] -0.5000000 0.8660254
FALSE [120,] 0.0000000 1.0000000
tiempo=1
j=1
# Gráfica de los armónicos
harmonics = fourier(serie, K = 6)
par(mar = c(1,4,1,1), mfrow = c(6,2))
for(i in 1:ncol(harmonics)){
plot(harmonics[,i], type = 'l', xlab = "Time", ylab = colnames(harmonics)[i])
}
par(mar = rep(4, 4), mfrow=c(1,1))
diff_tsibble <- tsibble_serie|>
mutate(logdiff_serie = difference(log(value)))|>
select(logdiff_serie)
# Explore diferentes valores de K
Modelo_serie_diff<-diff_tsibble|>
model(Fourier1seriediff = ARIMA(logdiff_serie ~ fourier(K=2) +
pdq(0, 0, 0) + PDQ(0, 0, 0)))
real_ajustado1 <- diff_tsibble %>%
left_join(fitted(Modelo_serie_diff,by=index)) %>%
select(-.model)
real_ajustado1 %>%
autoplot() +
geom_line(data=real_ajustado1,
aes(y=logdiff_serie, colour="real"))+
geom_line(data=real_ajustado1,
aes(y=.fitted, colour="ajustado"))+
scale_color_manual(name = "real/ajustado",
values = c("real" = "black", "ajustado" = "red"))
# Ajuste Dummy
Modelo_serie_diff_Dummy<-diff_tsibble|>model(
DummyAirdiff=ARIMA(logdiff_serie~season()+pdq(0, 0, 0) + PDQ(0, 0, 0))
)
Modelo_serie_diff_Dummy<-diff_tsibble%>%left_join(fitted(Modelo_serie_diff,by=index))%>%select(-.model)
Modelo_serie_diff_Dummy %>%
autoplot() +
geom_line(data=Modelo_serie_diff_Dummy,aes(y=logdiff_serie,colour="real"))+
geom_line(data=Modelo_serie_diff_Dummy,aes(y=.fitted,colour="ajustado"))+
scale_color_manual(name = "real/ajustado", values = c("real" = "black", "ajustado" = "red"))
#### Varios modelos la mismo tiempo
ajuste_final_models<-diff_tsibble%>%model(
Fourier1Airdiff=ARIMA(logdiff_serie~fourier(K=1)+pdq(0, 0, 0) + PDQ(0, 0, 0)),
Fourier2Airdiff=ARIMA(logdiff_serie~fourier(K=2)+pdq(0, 0, 0) + PDQ(0, 0, 0)),
Fourier3Airdiff=ARIMA(logdiff_serie~fourier(K=3)+pdq(0, 0, 0) + PDQ(0, 0, 0)),
DummyAirdiff=ARIMA(logdiff_serie~season()+pdq(0, 0, 0) + PDQ(0, 0, 0))
)
glance(ajuste_final_models)
FALSE # A tibble: 4 × 8
FALSE .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
FALSE <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
FALSE 1 Fourier1Airdiff 0.0429 19.0 -32.1 -31.9 -23.7 <cpl [0]> <cpl [0]>
FALSE 2 Fourier2Airdiff 0.0428 20.2 -30.4 -29.8 -16.4 <cpl [0]> <cpl [0]>
FALSE 3 Fourier3Airdiff 0.0413 23.4 -32.7 -31.7 -13.2 <cpl [0]> <cpl [0]>
FALSE 4 DummyAirdiff 0.0409 26.5 -29.0 -26.1 4.42 <cpl [0]> <cpl [0]>
ajuste_final_models %>%
select(Fourier1Airdiff)%>%coef()
FALSE # A tibble: 2 × 6
FALSE .model term estimate std.error statistic p.value
FALSE <chr> <chr> <dbl> <dbl> <dbl> <dbl>
FALSE 1 Fourier1Airdiff fourier(K = 1)C1_12 -0.0101 0.0268 -0.377 0.706
FALSE 2 Fourier1Airdiff fourier(K = 1)S1_12 0.0288 0.0266 1.08 0.282
Modelo_serie_diff_models<-diff_tsibble%>%left_join(fitted(ajuste_final_models)|>group_by(.model)%>%
pivot_wider(names_from = .model, values_from = .fitted))
Modelo_serie_diff_models %>%
autoplot() +
geom_line(data=Modelo_serie_diff_models,aes(y=logdiff_serie,colour="real"))+
geom_line(data=Modelo_serie_diff_models,aes(y=Fourier1Airdiff,colour="ajustadoFourier1"))+
geom_line(data=Modelo_serie_diff_models,aes(y=Fourier2Airdiff,colour="ajustadoFourier2"))+
geom_line(data=Modelo_serie_diff_models,aes(y=Fourier3Airdiff,colour="ajustadoFourier3"))+
geom_line(data=Modelo_serie_diff_models,aes(y=DummyAirdiff,colour="ajustadoDummy")) +
scale_color_manual(name = "real/ajustado", values = c("real" = "black", "ajustadoFourier1" = "red","ajustadoFourier2" = "blue","ajustadoFourier3"="green","ajustadoDummy"="yellow"))